Lab 2 Style Transfer

Xingming Qu, Bowei Tian

1.Manipulate the above VGG code to use un-pooling layers as in the Photo Realistic Li et al. paper. Alternatively, you can use another implementation (perhaps not using Keras) if you like.

In this lab, we will use the implementation from https://github.com/ykamikawa/tf-keras-SegNet/blob/master/layers.py

In [1]:
from keras import backend as K
from keras.layers import Layer
from keras.models import Model, Sequential, load_model
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, Input
from keras.utils.data_utils import get_file
import keras.backend as K
import h5py
import numpy as np
import tensorflow as tf
from keras.models import Model, Sequential, load_model
import keras.backend as K
from vgg import VGG19, preprocess_input
from decoder import decoder_layers
import sys
import os
from vgg import VGG19, preprocess_input
from decoder import decoder_layers
import sys
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import Callback
from scipy.misc import imresize, imsave
import numpy as np
from model import EncoderDecoder
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import Callback
from scipy.misc import imresize, imsave

## create customization layer
class MaxPoolingWithArgmax2D(Layer):
## follow the Keras sourse code
# you need to have the following function:

## initialization 
    def __init__(
            self,
            pool_size=(2, 2),
            strides=(2, 2),
            padding='same',
            **kwargs):
        super(MaxPoolingWithArgmax2D, self).__init__(**kwargs)
        self.padding = padding
        self.pool_size = pool_size
        self.strides = strides


#this is where the layer's logic lives
    def call(self, inputs, **kwargs):
        padding = self.padding
        pool_size = self.pool_size
        strides = self.strides
        if K.backend() == 'tensorflow':
            # get the shape of the tensor
            ksize = [1, pool_size[0], pool_size[1], 1]
            padding = padding.upper()
            strides = [1, strides[0], strides[1], 1]
            
            #this function do the maxpooling and save the pooling mask
            output, argmax = K.tf.nn.max_pool_with_argmax(
                    inputs,
                    ksize=ksize,
                    strides=strides,
                    padding=padding)
        else:
            errmsg = '{} backend is not supported for layer {}'.format(
                    K.backend(), type(self).__name__)
            raise NotImplementedError(errmsg)
        argmax = K.cast(argmax, K.floatx())
        return [output, argmax]

    #in case your layer modifies the shape of its input, you should specify here the shape transformation logic. 
    #This allows Keras to do automatic shape inference.
    def compute_output_shape(self, input_shape):
        ratio = (1, 2, 2, 1)
        output_shape = [
                dim//ratio[idx]
                if dim is not None else None
                for idx, dim in enumerate(input_shape)]
        output_shape = tuple(output_shape)
        return [output_shape, output_shape]

    def compute_mask(self, inputs, mask=None):
        return 2 * [None]

class MaxUnpooling2D(Layer):
    def __init__(self, size=(2, 2), **kwargs):
        super(MaxUnpooling2D, self).__init__(**kwargs)
        self.size = size

    def call(self, inputs, output_shape=None):
        #get input and pooling mask
        updates, mask = inputs[0], inputs[1]
        with K.tf.variable_scope(self.name):
            # convert mask to int32
            mask = K.cast(mask, 'int32')
            # get the shape of the tensor (4,)
            input_shape = K.tf.shape(updates, out_type='int32')
            #  calculation new shape
            # the new shape should only change in input_shape[1] and [2]
            if output_shape is None:
                output_shape = (
                        input_shape[0],
                        input_shape[1]*self.size[0],
                        input_shape[2]*self.size[1],
                        input_shape[3])
            self.output_shape1 = output_shape

            # calculation indices for batch, height, width and feature maps
            
            #Instantiates an all-ones variable of the same shape as another tensor.
            one_like_mask = K.ones_like(mask, dtype='int32')
            batch_shape = K.concatenate(
                    [[input_shape[0]], [1], [1], [1]],
                    axis=0)
            batch_range = K.reshape(
                    K.tf.range(output_shape[0], dtype='int32'),
                    shape=batch_shape)
            b = one_like_mask * batch_range
            y = mask // (output_shape[2] * output_shape[3])
            x = (mask // output_shape[3]) % output_shape[2]
            feature_range = K.tf.range(output_shape[3], dtype='int32')
            f = one_like_mask * feature_range

            # transpose indices & reshape update values to one dimension
            updates_size = K.tf.size(updates)
            indices = K.transpose(K.reshape(
                K.stack([b, y, x, f]),
                [4, updates_size]))
            values = K.reshape(updates, [updates_size])
            ret = K.tf.scatter_nd(indices, values, output_shape)
            return ret

    def compute_output_shape(self, input_shape):
        mask_shape = input_shape[1]
        return (
                mask_shape[0],
                mask_shape[1]*self.size[0],
                mask_shape[2]*self.size[1],
                mask_shape[3]
                )
Using TensorFlow backend.

We will use the dataset from https://github.com/yuweiming70/Landscape-Dataset, which includes 7268 320x180 landscape photos
We will train two models with upPooling

In [2]:
# we create 2 tow model
# the first one was 2layer-2layer encoder-decoder
# the second one was 4layer-4layer encoder-decoder
def first_model():
    
    inputs = Input(shape=(256, 256, 3))
    pool_size=(2,2)
    
    #####encoder
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x, mask_1 = MaxPoolingWithArgmax2D(pool_size)(x)
    
    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x, mask_2 = MaxPoolingWithArgmax2D(pool_size)(x)
    
    #####decoder
    
    x = MaxUnpooling2D(pool_size)([x, mask_2])
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block2_conv2')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block2_conv1')(x)

    x = MaxUnpooling2D(pool_size)([x, mask_1])
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block1_conv2')(x)
    output = Conv2D(3, (3, 3), activation='relu', padding='same',name='decoder_out')(x)

    return Model(inputs, output, name='decodermy')
    
def second_model():
    
    inputs = Input(shape=(256, 256, 3))
    pool_size=(2,2)
    #####encoder
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x, mask_1 = MaxPoolingWithArgmax2D(pool_size)(x)
    
    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x, mask_2 = MaxPoolingWithArgmax2D(pool_size)(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x, mask_3 = MaxPoolingWithArgmax2D(pool_size)(x)
    
    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x, mask_4 = MaxPoolingWithArgmax2D(pool_size)(x)
    
    #####decoder
    x = MaxUnpooling2D(pool_size)([x, mask_4])
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv3')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block4_conv1')(x)
    
    x = MaxUnpooling2D(pool_size)([x, mask_3])
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv3')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv2')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block3_conv1')(x)
    
    x = MaxUnpooling2D(pool_size)([x, mask_2])
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block2_conv2')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block2_conv1')(x)

    x = MaxUnpooling2D(pool_size)([x, mask_1])
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block1_conv2')(x)
    output = Conv2D(3, (3, 3), activation='relu', padding='same',name='decoder_out')(x)

    return Model(inputs, output, name='decodermy')
In [3]:
TRAIN_PATH = 'data'
TARGET_SIZE = (256, 256)
BATCH_SIZE = 16

# Create ImageDataGenerator
datagen = ImageDataGenerator()
gen = datagen.flow_from_directory(TRAIN_PATH, target_size=TARGET_SIZE,
                                  batch_size=BATCH_SIZE, class_mode=None)

def create_gen(img_dir, target_size, batch_size):
    datagen = ImageDataGenerator()
    gen = datagen.flow_from_directory(img_dir, target_size=target_size,
                                      batch_size=batch_size, class_mode=None)

    def tuple_gen():
        for img in gen:
            if img.shape[0] != batch_size:
                continue

            # (X, y)
            yield (img, img)

    return tuple_gen()

# This needs to be in scope where model is defined
class OutputPreview(Callback):
    def __init__(self, model, test_img_path, increment, preview_dir_path):
        test_img = image.load_img(test_img_path)
        test_img = imresize(test_img, (256, 256, 3))
        test_target = image.img_to_array(test_img)
        test_target = np.expand_dims(test_target, axis=0)
        self.test_img = test_target
        self.model = model

        self.preview_dir_path = preview_dir_path

        self.increment = increment
        self.iteration = 0

    def on_batch_end(self, batch, logs={}):
        if (self.iteration % self.increment == 0):
            output_img = self.model.predict(self.test_img)[0]
            fname = '%d.jpg' % self.iteration
            out_path = os.path.join(self.preview_dir_path, fname)
            imsave(out_path, output_img)
        self.iteration += 1
Found 7268 images belonging to 1 classes.
In [4]:
gen = create_gen(TRAIN_PATH, TARGET_SIZE, BATCH_SIZE)
num_samples = 7268
steps_per_epoch = num_samples // BATCH_SIZE
Found 7268 images belonging to 1 classes.
### All ready finished training model! Do not need to run again!
In [5]:
# encoder_decoder =first_model()
# encoder_decoder.compile(optimizer='adam', loss='mse')
# # encoder_decoder.summary()

# target_layer=2
# callbacks = [OutputPreview(encoder_decoder, 'data/land/0.jpg', 300, 'upout/%d' % target_layer)]
# epochs = 10
# encoder_decoder.fit_generator(gen, steps_per_epoch=steps_per_epoch,
#         epochs=epochs, callbacks=callbacks)
# encoder_decoder.save('first_model.h5')
### All ready finished training model! Do not need to run again!
In [6]:
# encoder_decoder =second_model()
# encoder_decoder.compile(optimizer='adam', loss='mse')
# # encoder_decoder.summary()

# target_layer=4
# callbacks = [OutputPreview(encoder_decoder, 'data/land/3.jpg', 300, 'upout/%d' % target_layer)]
# epochs = 10
# encoder_decoder.fit_generator(gen, steps_per_epoch=steps_per_epoch,
#         epochs=epochs, callbacks=callbacks)
# encoder_decoder.save('second_model.h5')

I also tried to train two encoder-decoders using upsampling. But I find the sample VGG code seems not correct. The encoder and decoder were not symmetrical. I modified the vgg.py and decoder.py. I will upload them in the canvas. Again, I trained the 2layer-2layer encoder-decoder and 4layer-4layer encoder-decoder. The difference between these two and the previous two are the unpooling/upsampling layer. You can find the training detail at Attachment1.html

So there are 4 models in total:

1.Unpooling 2 layer encoder_decoder

2.Unpooling 4 layer encoder_decoder

3.Unsampling 2 layer encoder_decoder

4.Unsampling 4 layer encoder_decoder

3. Show a few images and their reconstructions using each decoder. Comment on any artifacts from the images.

In [7]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import warnings
warnings.filterwarnings("ignore")

# input a image then pass it to the model to get reconstruction
def get_reconstruction(model,img_path):
    newimg=mpimg.imread( img_path)
    #first we need to do the preprocessing
    # resize to vgg input size
    newimg=cv2.resize(newimg,(256,256))
    # add the batch dimension
    newimg = np.expand_dims(newimg, axis=0)
    # get the reconstruction 
    reconstruction_m1=model.predict(newimg)
    # remove the batch dimension
    reconstruction_m1=np.squeeze(reconstruction_m1)
    # clips pixel value to 0-255
    reconstruction_m1=reconstruction_m1.astype(int)
    reconstruction_m1=np.clip(reconstruction_m1, 0, 255)
    return reconstruction_m1
In [8]:
from keras.models import load_model
import cv2

# load the 4 models
# m1= unPooling 2 layer
m1=first_model()
m1.load_weights('first_model.h5')

# m2= unPooling 4 layer
m2=second_model()
m2.load_weights('second_model.h5')

# encoder_decoder_2 = Upsampling 2 payer
encoder_decoder_2 = EncoderDecoder(target_layer=2)
encoder_decoder_2.encoder.load_weights('encoder_2.h5')
encoder_decoder_2.decoder.load_weights('decoder_2.h5')

# encoder_decoder_4 = Upsampling 4 payer
encoder_decoder_4 = EncoderDecoder(target_layer=4)
encoder_decoder_4.encoder.load_weights('encoder_4.h5')
encoder_decoder_4.decoder.load_weights('decoder_4.h5')

def show_result(image):
    origin=mpimg.imread( image)
    plt.figure(figsize=(8,6))
    plt.imshow( origin)
    plt.title('Original img')

    plt.figure(figsize=(11,11))
    plt.subplot(1,2,1)
    reconstruction=get_reconstruction(m1,image)
    plt.imshow( reconstruction )
    plt.title('using unPooling 2 layer encoder_decoder')

    plt.subplot(1,2,2)
    reconstruction=get_reconstruction(m2,image)
    plt.imshow( reconstruction )
    plt.title('using unPooling 4 layer encoder_decoder')

    plt.figure(figsize=(11,11))
    plt.subplot(1,2,1)
    reconstruction=get_reconstruction(encoder_decoder_2.model,image)
    plt.imshow( reconstruction )
    plt.title('using Upsampling 2 layer encoder_decoder')

    plt.subplot(1,2,2)
    reconstruction=get_reconstruction(encoder_decoder_4.model,image)
    plt.imshow( reconstruction )
    plt.title('using Upsampling 4 layer encoder_decoder')
In [9]:
im='data/land/3.jpg'
show_result(im)
im='data/land/2.jpg'
show_result(im)
im='data/land/50.jpg'
show_result(im)

I think unPooling 2 and Upsampling 2 both good!

unPooling 2 layer encoder_decoder VS Upsampling 2 layer encoder_decoder

The reconstruction of unPooling 2 layer encoder_decoder has some graininess(it might because it uses 0 padding when unpooling), which means it is not as smooth as Upsampling 2 layer encoder_decoder. So I think Upsampling 2 layer encoder_decoder is better

unPooling 2 layer encoder_decoder VS unPooling 4 layer encoder_decoder

Unpooling 2 layer encoder_decoder is a much better. It seems like unPooling 4 layer is still underfitting. Because 4 layer has more parameters, it needs longer epoch to converge.

Upsampling 4 layer encoder_decoder VS unPooling 4 layer encoder_decoder

It is obvious that both Upsampling 4 and unPooling 4 did not converge. Compared to Upsampling 4, unPooling 4 restores more details while Upsampling 4 is more smooth.

In [10]:
def wct(content, style, alpha=0.9, eps=1e-3):
    '''
    https://github.com/eridgd/WCT-TF/blob/master/ops.py

       Perform Whiten-Color Transform on feature maps using numpy
       See p.4 of the Universal Style Transfer paper for equations:
       https://arxiv.org/pdf/1705.08086.pdf
    '''
    # 1xHxWxC -> CxHxW
    content_t = np.transpose(np.squeeze(content), (2, 0, 1))
    style_t = np.transpose(np.squeeze(style), (2, 0, 1))

    # CxHxW -> CxH*W
    content_flat = content_t.reshape(-1, content_t.shape[1]*content_t.shape[2])
    style_flat = style_t.reshape(-1, style_t.shape[1]*style_t.shape[2])

    # Whitening transform
    mc = content_flat.mean(axis=1, keepdims=True)
    fc = content_flat - mc
    fcfc = np.dot(fc, fc.T) / (content_t.shape[1]*content_t.shape[2] - 1)
    Ec, wc, _ = np.linalg.svd(fcfc)
    k_c = (wc > 1e-5).sum()
    Dc = np.diag((wc[:k_c]+eps)**-0.5)
    fc_hat = Ec[:,:k_c].dot(Dc).dot(Ec[:,:k_c].T).dot(fc)

    # Coloring transform
    ms = style_flat.mean(axis=1, keepdims=True)
    fs = style_flat - ms
    fsfs = np.dot(fs, fs.T) / (style_t.shape[1]*style_t.shape[2] - 1)
    Es, ws, _ = np.linalg.svd(fsfs)
    k_s = (ws > 1e-5).sum()
    Ds = np.sqrt(np.diag(ws[:k_s]+eps))
    fcs_hat = Es[:,:k_s].dot(Ds).dot(Es[:,:k_s].T).dot(fc_hat)
    fcs_hat = fcs_hat + ms

    # Blend transform features with original features
    blended = alpha*fcs_hat + (1 - alpha)*(fc)

    # CxH*W -> CxHxW
    blended = blended.reshape(content_t.shape)
    # CxHxW -> 1xHxWxC
    blended = np.expand_dims(np.transpose(blended, (1,2,0)), 0)

    return np.float32(blended)
In [11]:
def preprocessing_img(img_path):
    # preprocessing img to fit vgg input  
    newimg=mpimg.imread( img_path)
    newimg=cv2.resize(newimg,(256,256))
    newimg = np.expand_dims(newimg, axis=0)
    return newimg
In [12]:
def tansfer(img_c,img_s,alpha):
    #using encoder to get the feature of I_C and I_S
    feats_c=encoder_decoder_2.encoder.predict(img_c)
    feats_s=encoder_decoder_2.encoder.predict(img_s)
    
    # do WCT 
    feats_cs = wct(feats_c, feats_s,alpha)
    
    # using decoder to reconstruct 
    out = encoder_decoder_2.decoder.predict(feats_cs)
    # make the out put a image
    out=np.squeeze(out)
    out=out.astype(int)
    out=np.clip(out, 0, 255)
    return out
In [13]:
def show_result(img_c_path,img_s_path,alpha,outname):
    img_c=preprocessing_img(img_c_path)
    img_s=preprocessing_img(img_s_path)
    out=tansfer(img_c,img_s,alpha)
    imsave('out.jpg', out)

    plt.figure(figsize=(20,20))
    plt.subplot(1,3,1)
    I_c=mpimg.imread(img_c_path)
    plt.imshow( I_c )
    plt.title('Content IMG')

    plt.subplot(1,3,2)
    I_s=mpimg.imread(img_s_path)
    plt.imshow( I_s )
    plt.title('Style IMG')

    newout=mpimg.imread('out.jpg')
    newout=cv2.resize(newout,(I_s.shape[1],I_s.shape[0]))
    imsave(outname+'.jpg', newout)

    plt.subplot(1,3,3)
    plt.imshow(newout)
    plt.title('Transfer IMG')
In [14]:
print('Summer Lake --> Winter Lake')
show_result('data/land/50.jpg','data/land/3.jpg',0.9,'Tansfer50')
Summer Lake --> Winter Lake
In [15]:
print('Desert Pyramid --> Oasis Pyramid')
show_result('data/land/1765.jpg','data/land/1581.jpg',0.9,'Tansfer1765')
Desert Pyramid --> Oasis Pyramid
In [16]:
print('Daytime Building --> Night Building')
show_result('data/land/2212.jpg','data/land/2226.jpg',0.5,'Tansfer2212')
Daytime Building --> Night Building
In [17]:
print('Color the background')
show_result('data/land/4944.jpg','data/land/5228.jpg',0.9,'Tansfer4944')
Color the background

The results of whitening and coloring transform looks good! However, these four pictures have more or less artifacts at the edges. Especially the edges of the building in the third picture and shadow on both sides of the bridge in the first picture. Besides, some details in the picture are missing. For example, the outline of building's window is not obvious. In other word, it becomes blurred.

5. Exceptional work: Implement the smoothing constraint from Li et al. to achieve photo realistic style transfer results. Show a few photo style changes from the network. If using code from another author (not your own), you will be graded on the clarity of explanatory comments you add to the code.

In this part, we will use the exsiting code for smoothing constraint from https://github.com/NVIDIA/FastPhotoStyle
Specifically we will use https://github.com/NVIDIA/FastPhotoStyle/blob/master/photo_smooth.py
Tried to understand their code but its very hard....if you look at the above link....They only wrote one line of note!

In [18]:
import scipy.misc
import numpy as np
import scipy.sparse
import scipy.sparse.linalg
from numpy.lib.stride_tricks import as_strided
from PIL import Image

# the author create a classs called Propagator to do the smoothing
# and the the computational bottleneck of Photorealistic Image Stylization is the propagation step

class Propagator():
    def __init__(self, beta=0.9999):
        super(Propagator, self).__init__()
        self.beta = beta

    def process(self, initImg, contentImg):
        # read content image 
        if type(contentImg) == str:
            content = scipy.misc.imread(contentImg, mode='RGB')
        else:
            content = contentImg.copy()
        # content = scipy.misc.imread(contentImg, mode='RGB')

        # read un-smoothing image as B and # scale all the pixel to 0-1
        if type(initImg) == str:
            B = scipy.misc.imread(initImg, mode='RGB').astype(np.float64) / 255
        else:
            B = scipy.asarray(initImg).astype(np.float64) / 255
        # B = scipy.misc.imread(initImg, mode='RGB').astype(np.float64)/255
        
        #get the shape of un-smoothing image
        h1,w1,k = B.shape
        # do not use edge pixels
        h = h1 - 4
        w = w1 - 4
        B = B[int((h1-h)/2):int((h1-h)/2+h),int((w1-w)/2):int((w1-w)/2+w),:]
        # finish cutting the edge 
        
        # make the content image have the same shape as un-smoothing image
        content = scipy.misc.imresize(content,(h,w))
        
        # padding the un-smoothing image to its orginal shape 
        B = self.__replication_padding(B,2)
        
        # padding content image to its orginal shape 
        content = self.__replication_padding(content,2)
        
        # scale all the pixel to 0-1
        content = content.astype(np.float64)/255
        
        # reshape to its orginal shape. Actually no need to do this.
        B = np.reshape(B,(h1*w1,k))
        
        # compute The affinity matrix W of content image 
        # they were using the matting Laplacian from
        # http://webee.technion.ac.il/people/anat.levin/papers/Matting-Levin-Lischinski-Weiss-CVPR06.pdf
        # formula (15)
        W = self.__compute_laplacian(content)
        
        # Convert this matrix to Compressed Sparse Column format
        # Duplicate entries will be summed together.
        W = W.tocsc()
        
        # compute sum at dimension [0]
        dd = W.sum(0)
        dd = np.sqrt(np.power(dd,-1))
        dd = dd.A.squeeze()
        #m = np.matrix([[1, 2], [3, 4]])
        #m.squeeze()
        # matrix([[1, 2],
         #       [3, 4]])
        
        #Compressed Sparse Column matrix
        D = scipy.sparse.csc_matrix((dd, (np.arange(0,w1*h1), np.arange(0,w1*h1)))) # 0.026
        
        # this line is not same as mentioned in the paper
        # paper used D ** -(1/2)
        # There should be no big difference in results
        S = D.dot(W).dot(D)
        
        # Identity matrix in sparse format 
        # Returns an identity matrix with shape (n,n) using a given sparse format and dtype.
        A = scipy.sparse.identity(w1*h1) - self.beta*S
        A = A.tocsc()
        #Return a function for solving a sparse linear system, with A pre-factorized.
        #To solve the linear system of equations given in A,
        #the solve callable should be passed an ndarray of shape (N,).
        solver = scipy.sparse.linalg.factorized(A)
        # solver is result
        V = np.zeros((h1*w1,k))
        V[:,0] = solver(B[:,0])
        V[:,1] = solver(B[:,1])
        V[:,2] = solver(B[:,2])
        V = V*(1-self.beta)
        V = V.reshape(h1,w1,k)
        V = V[2:2+h,2:2+w,:]
        
        img = Image.fromarray(np.uint8(np.clip(V * 255., 0, 255.)))
        return img

    # Returns sparse matting laplacian
    def __compute_laplacian(self, img, eps=10**(-7), win_rad=1):
            # win_size ==9
            win_size = (win_rad*2+1)**2
            # get content image shape
            h, w, d = img.shape
            
            # c_h= h-2 c_w=w-2
            # still cut the edge
            c_h, c_w = h - 2*win_rad, w - 2*win_rad
            
            win_diam = win_rad*2+1 # win_diam= 3
            
            # for example  indsM = np.arange(2*3).reshape((2, 3)) produce
            # array([[0, 1, 2],
            #        [3, 4, 5]])     so this code get index for each pixel
            indsM = np.arange(h*w).reshape((h, w))
            # flatten each channel  
            ravelImg = img.reshape(h*w, d)
            
            #create sliding windows
            win_inds = self.__rolling_block(indsM, block=(win_diam, win_diam))
            # flatten each sliding window
            win_inds = win_inds.reshape(c_h, c_w, win_size)
            winI = ravelImg[win_inds]
            
            #mu and var are the mean and variance of the intensities in the window wk around k
            win_mu = np.mean(winI, axis=2, keepdims=True)
            win_var = np.einsum('...ji,...jk ->...ik', winI, winI)/win_size - np.einsum('...ji,...jk ->...ik', win_mu, win_mu)
            
            inv = np.linalg.inv(win_var + (eps/win_size)*np.eye(3))
            #Evaluates the Einstein summation convention on the operands.
            #Using the Einstein summation convention, many common multi-dimensional, 
            #linear algebraic array operations can be represented in a simple fashion.
            #In implicit mode einsum computes these values.
            X = np.einsum('...ij,...jk->...ik', winI - win_mu, inv)
            vals = (1/win_size)*(1 + np.einsum('...ij,...kj->...ik', X, winI - win_mu))
            
            #Return a contiguous flattened array.
            # A 1-D array, containing the elements of the input, is returned.
            nz_indsCol = np.tile(win_inds, win_size).ravel()
            nz_indsRow = np.repeat(win_inds, win_size).ravel()
            nz_indsVal = vals.ravel()
            L = scipy.sparse.coo_matrix((nz_indsVal, (nz_indsRow, nz_indsCol)), shape=(h*w, h*w))
            return L

    # this function is used to padding the image,since we need to calculate 8 neighbour for each pixel.
    # so we can handle the edge pixel
    def __replication_padding(self, arr,pad):
            h,w,c = arr.shape
            ans = np.zeros((h+pad*2,w+pad*2,c))
            for i in range(c):
                    ans[:,:,i] = np.pad(arr[:,:,i],pad_width=(pad,pad),mode='edge')
            return ans

    # !! this function is used to create sliding windows!!
    # detail please see below
    def __rolling_block(self, A, block=(3, 3)):
        shape = (A.shape[0] - block[0] + 1, A.shape[1] - block[1] + 1) + block
        strides = (A.strides[0], A.strides[1]) + A.strides
        return as_strided(A, shape=shape, strides=strides)
#########################

The test codes next are used for understanding the Propagator codes. I unseal the functions inside Propagator class and tried to run them and get their output
Feel free to skip it.....

In [19]:
indsM = np.arange(4*4).reshape((4, 4))
indsM
Out[19]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
In [20]:
img=cv2.imread('data/land/50.jpg')
ravelImg = img.reshape(180*320, 3)
ravelImg.shape
Out[20]:
(57600, 3)
In [21]:
x = np.arange(5*4).reshape((5, 4))
print(x)
print(x.strides)
# default is int64 so each element is 8byte. and there are 4 col so it is 4x8
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]]
(32, 8)
In [22]:
## !! this function is used to create sliding windows!!
def rolling_block(A, block=(3, 3)):
    print("A.shape",A.shape)
    shape = (A.shape[0] - block[0] + 1, A.shape[1] - block[1] + 1) + block
    print('shape',shape)
    print((A.strides[0], A.strides[1]))
    print(A.strides)
    strides = (A.strides[0], A.strides[1]) + A.strides
    print(strides)
    return as_strided(A, shape=shape, strides=strides)
RB=rolling_block(indsM)
print(indsM)
print(RB)
print(RB.shape)
A.shape (4, 4)
shape (2, 2, 3, 3)
(32, 8)
(32, 8)
(32, 8, 32, 8)
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[[[ 0  1  2]
   [ 4  5  6]
   [ 8  9 10]]

  [[ 1  2  3]
   [ 5  6  7]
   [ 9 10 11]]]


 [[[ 4  5  6]
   [ 8  9 10]
   [12 13 14]]

  [[ 5  6  7]
   [ 9 10 11]
   [13 14 15]]]]
(2, 2, 3, 3)
In [23]:
eps=10**(-7)
win_rad=1
img=np.arange(5*4*3).reshape((5, 4,3))
print(img.shape)
win_size = (win_rad*2+1)**2
# get content image shape
h, w, d = img.shape

# c_h= h-2 c_w=w-2
# still cut the edge
c_h, c_w = h - 2*win_rad, w - 2*win_rad

win_diam = win_rad*2+1 # win_diam= 3

# for example  indsM = np.arange(2*3).reshape((2, 3)) produce
# array([[0, 1, 2],
#        [3, 4, 5]])     so this code get index for each pixel
indsM = np.arange(h*w).reshape((h, w))
# flatten each channel  
ravelImg = img.reshape(h*w, d)

#create sliding windows
print('indsM',indsM)
win_inds = rolling_block(indsM, block=(win_diam, win_diam))
# flatten each sliding window
win_inds = win_inds.reshape(c_h, c_w, win_size)
print(win_inds)
winI = ravelImg[win_inds]
print(winI)
(5, 4, 3)
indsM [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]]
A.shape (5, 4)
shape (3, 2, 3, 3)
(32, 8)
(32, 8)
(32, 8, 32, 8)
[[[ 0  1  2  4  5  6  8  9 10]
  [ 1  2  3  5  6  7  9 10 11]]

 [[ 4  5  6  8  9 10 12 13 14]
  [ 5  6  7  9 10 11 13 14 15]]

 [[ 8  9 10 12 13 14 16 17 18]
  [ 9 10 11 13 14 15 17 18 19]]]
[[[[ 0  1  2]
   [ 3  4  5]
   [ 6  7  8]
   [12 13 14]
   [15 16 17]
   [18 19 20]
   [24 25 26]
   [27 28 29]
   [30 31 32]]

  [[ 3  4  5]
   [ 6  7  8]
   [ 9 10 11]
   [15 16 17]
   [18 19 20]
   [21 22 23]
   [27 28 29]
   [30 31 32]
   [33 34 35]]]


 [[[12 13 14]
   [15 16 17]
   [18 19 20]
   [24 25 26]
   [27 28 29]
   [30 31 32]
   [36 37 38]
   [39 40 41]
   [42 43 44]]

  [[15 16 17]
   [18 19 20]
   [21 22 23]
   [27 28 29]
   [30 31 32]
   [33 34 35]
   [39 40 41]
   [42 43 44]
   [45 46 47]]]


 [[[24 25 26]
   [27 28 29]
   [30 31 32]
   [36 37 38]
   [39 40 41]
   [42 43 44]
   [48 49 50]
   [51 52 53]
   [54 55 56]]

  [[27 28 29]
   [30 31 32]
   [33 34 35]
   [39 40 41]
   [42 43 44]
   [45 46 47]
   [51 52 53]
   [54 55 56]
   [57 58 59]]]]
In [24]:
img.shape
Out[24]:
(5, 4, 3)
In [25]:
img.sum(0).shape
Out[25]:
(4, 3)
In [26]:
a = np.arange(25).reshape(5,5)
a
Out[26]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])
In [27]:
print(np.einsum('ii->i', a))# Extract the diagonal (requires explicit form):
print(np.einsum('ij->i', a)) # Sum over an axis (requires explicit form):
print(np.einsum('...j->...', a))
[ 0  6 12 18 24]
[ 10  35  60  85 110]
[ 10  35  60  85 110]
################ Finish Testing
In [28]:
def show_result_after_smooth(initImg, contentImg):
    plt.figure(figsize=(20,20))
    plt.subplot(1,2,1)
    I_c=mpimg.imread(initImg)
    plt.imshow( I_c )
    plt.title('Tansfer IMG')

    plt.subplot(1,2,2)
    p_pro = Propagator()
    smooth=p_pro.process(initImg,contentImg)
    plt.imshow( smooth )
    plt.title('Tansfer IMG after smooth')
In [29]:
show_result_after_smooth('Tansfer50.jpg','data/land/50.jpg')
In [30]:
show_result_after_smooth('Tansfer1765.jpg','data/land/1765.jpg')
In [31]:
show_result_after_smooth('Tansfer2212.jpg','data/land/2212.jpg')
In [32]:
show_result_after_smooth('Tansfer4944.jpg','data/land/4944.jpg')

The results are amazing! The edges look more natural! And it restored more details: see the text in the bus and the building behind the bus!